/*

This code merges in energy productivities

*/

clear all
set maxvar 10000


* import and read in all csv's with energy productivity
local files: dir "/projects/programs/codePYTHON/Production_Function_Estimation/Productivity_by_Industry/" files "*.csv"
* for four digit with 4 moments
local i = 0
foreach f of local files {
	import delimited "/projects/programs/codePYTHON/Production_Function_Estimation/Productivity_by_Industry/`f'", clear
	
	tab naicscode4
	if `i' == 1 append using "/projects/data/dataSTATA/combined/productivity_4digit_4m.dta"
	local i = 1	
	sa "/projects/data/dataSTATA/combined/productivity_4digit_4m.dta", replace	
	
}


* trim obvious outliers
use "/projects/data/dataSTATA/combined/productivity_4digit_4m.dta", clear

* merge with pf func parameters
preserve
	import delimited "/projects/programs/codePYTHON/Production_Function_Estimation/static_estimates_by_industry_full.csv", clear
	drop v1
	* ren naics bestnaics
	ren naics naicscode4
	tempfile pf_param
	sa `pf_param', replace
restore

merge m:1 naicscode4 using `pf_param', assert(3) keep(3) nogen

gen oe = exp(we)
gen oh = exp(wh)
gen we_scale = we /(1-sig)
gen oe_scale = exp(we_scale)
la var we_scale "log energy productivity scaled by 1-sig"
la var oe_scale "energy productivity level scaled by 1-sig"


sum oh, detail
drop if oh > 100 * `r(p50)'

sum oe, detail
drop if oe > 100 * `r(p50)'

sum oe_scale, detail
drop if oe_scale > 100 * `r(p50)'


keep lbdnum year we oe wh oh oe_scale we_scale

sa "/projects/data/dataSTATA/combined/productivity_trim_4digit_4m.dta", replace

* merge with main data

use "/projects/data/dataSTATA/combined/combined_model_energy.dta", clear
merge 1:1 lbdnum year using "/projects/data/dataSTATA/combined/productivity_trim_4digit_4m.dta", assert(1 3) keep(1 3) nogen

la var we "log of energy productivity"
la var oe "energy productivity"

* merge with pf func parameters
 merge m:1 naicscode4 using `pf_param', assert(3) keep(3) nogen

drop entrant
gen entrant = 0
replace entrant = 1 if year == firstyear

bys bestnaics (year): egen mean_wh = mean(wh)
replace wh = mean_wh if mi(wh)
replace oh = exp(mean_wh) if mi(oh)

bys bestnaics (year): egen mean_we = mean(we)
replace we = mean_we if mi(we)
replace oe = exp(mean_we) if mi(oe)

bys bestnaics (year): egen mean_we_scale = mean(we_scale)
replace we_scale = mean_we_scale if mi(we_scale)
replace oe_scale = exp(mean_we_scale) if mi(oe_scale)

sa "/projects/data/dataSTATA/combined/combined_productivity_energy_4digit_4m.dta", replace

*>------------- merge in SEDS prices -------------------------------------------<

import delimited "/projects/dstafftransfer/transfer.20200428/naics5811.csv", clear 
* relevant deflators:
* - piship -- value of shipments
foreach var of varlist piship pimat pien piinv {
	gen `var'_n = `var' if year == 2011
	bys naics: egen `var'_2011 = mean(`var'_n)
	gen `var'_dfltr = `var'_2011 / `var'
	drop `var'_n `var'_2011 
	
}
keep naics year *_dfltr
ren naics bestnaics

* for energy prices in electricity sector, we will use average of energy prices deflators in our industries (referenced to GSW)
collapse (mean) pien_dfltr, by(year)
ren pien_dfltr pien_avg
tempfile energy_deflators
sa `energy_deflators', replace

import delimited "/projects/dstafftransfer/transfer.20200428/pr_all.csv", varnames(1) encoding(ISO-8859-2) clear 

* only keep electricity prices
keep if msn == "ESICD"
drop data_status msn
reshape long v, i(state) j(year)
replace year = year + 1966
ren v seds_price

* convert from $ / btu to $ / kwh - done in a secondary file for disclosure purposes
do projects/programs/codeSTATA/A_cleaning/6_merge_productivity_unitsconversion.do

merge m:1 year using `energy_deflators', assert(1 2 3) keep(1 3) nogen

*deflate to 2011
replace seds_price = seds_price * pien_avg

la var seds_price "state avg elec price from SEDS (2011 $/kwh)"
ren state postalst
gen lseds_price = log(seds_price)
la var lseds_price "log of seds price"
drop pien_avg
sa "/projects/data/dataSTATA/economic/seds_elec_prices.dta", replace

ren year firstyear
ren seds_price seds_price_init
la var seds_price_init "state avg entry year elec price, SEDS (2011 $/kwh)"
gen lseds_price_init = log(seds_price_init)
la var lseds_price_init "log of seds_price_init"
tempfile firstyearseds
sa `firstyearseds', replace


use "/projects/data/dataSTATA/combined/combined_productivity_energy_4digit_4m.dta", clear

merge m:1 postalst year using "/projects/data/dataSTATA/economic/seds_elec_prices.dta", assert(2 3) keep(3) nogen
merge m:1 postalst firstyear using `firstyearseds', assert(2 3) keep(3) nogen

sa "/projects/data/dataSTATA/combined/combined_productivity_energy_4digit_4m.dta", replace
